Starting from importing packages until data cleaning is essential. Remaining code blocks are independent of each other, can be ran individually.
# inputs
dataset_path <- "./Data/"
function_path <- "./Functions/"
# outputs
itrt_plot_path <- "./Output/InteractivePlot/"
sttc_plot_path <- "./Output/StaticPlot/"
out_data_path <- "./Output/Data/"
daily_covid <- import(
paste0(dataset_path, "worldometer_coronavirus_daily_data.csv")
)
summary_covid <- import(
paste0(dataset_path, "worldometer_coronavirus_summary_data.csv")
)
head(daily_covid)
## date country cumulative_total_cases daily_new_cases active_cases
## 1 2020-02-15 Afghanistan 0 NA 0
## 2 2020-02-16 Afghanistan 0 NA 0
## 3 2020-02-17 Afghanistan 0 NA 0
## 4 2020-02-18 Afghanistan 0 NA 0
## 5 2020-02-19 Afghanistan 0 NA 0
## 6 2020-02-20 Afghanistan 0 NA 0
## cumulative_total_deaths daily_new_deaths
## 1 0 NA
## 2 0 NA
## 3 0 NA
## 4 0 NA
## 5 0 NA
## 6 0 NA
head(summary_covid)
## country continent total_confirmed total_deaths total_recovered
## 1 Afghanistan Asia 158275 7367 145750
## 2 Albania Europe 213257 3228 202077
## 3 Algeria Africa 220415 6310 151347
## 4 Andorra Europe 25289 141 21511
## 5 Angola Africa 86636 1789 67477
## 6 Anguilla North America 1777 6 1702
## active_cases serious_or_critical total_cases_per_1m_population
## 1 5158 1124 3932
## 2 7952 23 74227
## 3 62758 34 4893
## 4 3637 31 326512
## 5 17370 7 2518
## 6 69 NA 116869
## total_deaths_per_1m_population total_tests total_tests_per_1m_population
## 1 183 826810 20541
## 2 1124 1495002 520354
## 3 140 230861 5125
## 4 1820 249838 3225714
## 5 52 1296669 37686
## 6 395 51382 3379283
## population
## 1 40250878
## 2 2873049
## 3 45046063
## 4 77452
## 5 34407243
## 6 15205
daily_covid <-
daily_covid %>%
replace(is.na(.), 0) %>%
mutate(date = as.Date(date))
What is the overview of covid cases?
## global percentage of death, active case and recovered ##
# sum vertically
categories <- c("total_deaths", "total_recovered", "active_cases")
category <- str_replace_all(categories, pattern = "_", replacement = " ")
category <- str_to_title(category)
data <-
summary_covid[, categories] %>%
colSums(na.rm = T)
data <- data.frame(
category=category,
count=data
)
data$prettyCount <- prettyNum(data$count, big.mark = ",", scientific = F)
# Compute percentages
data$fraction <- data$count / sum(data$count)
# Compute the cumulative percentages (top of each rectangle)
data$ymax <- cumsum(data$fraction)
# Compute the bottom of each rectangle
data$ymin <- c(0, head(data$ymax, n=-1))
# Compute label position
data$labelPosition <- (data$ymax + data$ymin) / 2
# Compute display percentages
data$prettyFraction <- percent(data$fraction)
# Make the plot
q1 <-
ggplot(data, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=category)) +
geom_rect() +
geom_text(
x=4.3,
aes(y=labelPosition, label=prettyCount, color=category, fontface="bold"),
size=3.5
) + # x here controls label position (inner / outer)
geom_text(
x=3.5,
aes(y=labelPosition, label=prettyFraction, fontface="bold"),
color="white",
size=4
) +
scale_fill_brewer(palette="Set2") +
scale_color_brewer(palette="Set2") +
coord_polar(theta="y") +
xlim(c(2, 4)) +
theme_void() +
annotate(
geom = "text",
x = 2,
y = 0,
colour = "#eba834",
label = paste0(
"Total Cases\n",
prettyNum(sum(data$count), big.mark = ",", scientific = F)
)
)
ggsave(paste0(sttc_plot_path, "qa1_cases_proportion.png"))
## Saving 7 x 5 in image
q1
What is the scale of infected population in different continent or country?
## comparison of cases between different continent ##
q2 <-
summary_covid %>% # data
select(country:active_cases) %>%
group_by(continent) %>% # group_by
filter(total_confirmed > quantile(total_confirmed, 0.7)) %>% # removing small cases
ungroup() %>%
group_by(continent, country) %>%
# turning 3 columes into sub sub group (wide to long conversion)
gather(category, count, total_recovered, active_cases, total_deaths, factor_key=T) %>%
ungroup() %>%
mutate(category = factor(category, labels = c("Recovered", "Active Cases", "Deaths"))) %>%
treemap( index=c("continent","country","category"),
vSize="count",
type="index",
palette = "Set2",
title = "Group by continent top 70 percentile confirmed cases",
align.labels=list(
c("center", "center"),
c("left", "top"),
c("left", "bottom")
)
)
itrt_q2 <- d3tree2( q2 , rootname = "Group by continent top 70 percentile confirmed cases" )
saveWidget(itrt_q2, file = paste0(itrt_plot_path, "qa2_infection_scale.html"))
itrt_q2
How many people suffered from covid?
# overview of accumulated cases vs date for all the country
# global stacked area plot
data <-
# group by summation
daily_covid %>%
group_by(date) %>%
summarise(
cumulative_total_cases = sum(cumulative_total_cases, na.rm = T),
cumulative_total_deaths = sum(cumulative_total_deaths, na.rm = T),
) %>%
# convert wide columns to long rows
gather(categories, count,
cumulative_total_cases, cumulative_total_deaths) %>%
#
rowwise() %>%
mutate(text =
paste(
str_to_title(last(strsplit(categories, "_")[[1]])),
"Count:", comma(count),
"\nDate:", as.Date(date, format = "%d %b %Y")
)
) %>%
arrange(date) # this is just to check if text is appended properly
write.csv(data, paste0(out_data_path, "qa3_cumulative_cases_by_date.csv"))
facet_labels <- c(
'cumulative_total_cases'="Cumulative Cases",
'cumulative_total_deaths'="Cumulative Deaths"
)
q3 <- # I can't use any function in the text argument \|/
ggplot(data, aes(x=date, y=count, group=categories, fill=categories, text = text)) +
geom_area(alpha=0.8 , size=0.5, color="black") +
facet_wrap(~categories, scales = "free_y", labeller = as_labeller(facet_labels)) +
scale_fill_viridis(discrete = T, option="B", begin = 0.3, end = 0.7) +
scale_x_date(date_labels = "%b %Y") +
scale_y_continuous(labels = unit_format(unit = "M", scale = 1e-6)) +
theme(legend.position="none") +
ggtitle("Cumulative Covid Cases") +
ylab("Covid Cases") +
xlab("Date") +
theme_ipsum() +
theme(legend.position="none", axis.text.x = element_text(angle=45, hjust = 1))
ggsave(paste0(sttc_plot_path, "qa3_cumulative_cases_by_date.png"))
## Saving 7 x 5 in image
itrt_q3 <- ggplotly(q3, tooltip = "text")
saveWidget(itrt_q3, file = paste0(itrt_plot_path, "qa3_cumulative_cases_by_date.html"))
# q3
itrt_q3
Which country has the most cases?
# ranking of cases for the top n countries
# hist
How did the number of active cases evolve starting from the beginning?
# ALERT !!! THIS SECTION CREATES GIF PLOTS, IT REQUIRE ROUGHLY 5 MINUTES TO RUN
# This block of code is disabled, remove eval=F to evaluate
# 100 MB of images will be generated during the process
# The final GIF is around 20MD with a resolution of 1920 x 1065, 8 fps
# An extra standalone R script (./qa5.R) is available for this section
# In case that it doesn't work in Rmarkdown
# map, gif encoder, list sorting
pacman::p_load(maps,gifski,gtools)
# map boarder data
world <- map_data("world")
# loop config
date_seq <- seq(as.Date(min(daily_covid$date)),
as.Date(max(daily_covid$date)), "days")
print_frequrency <- 50
len_date_seq <- length(date_seq)
max_active_cases <- max(daily_covid$active_cases)
min_active_cases <- min(daily_covid$active_cases)
# generate active cases world map images
for (i in c(1:len_date_seq)) {
current_date <- as.Date(date_seq[i])
date_daily_covid <- filter(daily_covid, date == current_date)
mapdata <- left_join(world, date_daily_covid, by= c("region" = "country"))
map <-
ggplot(mapdata, aes(x=long, y=lat, group=group)) +
geom_polygon(
aes(fill = active_cases),
color="black",
size= 0.2
) +
scale_fill_distiller(
name="Active Cases",
palette = "Spectral",
na.value = "grey50",
trans = "log10",
limits= c(min_active_cases, max_active_cases)
) +
ggtitle(paste0("Date: ", current_date)) +
xlab(element_blank()) +
ylab(element_blank()) +
guides(fill = guide_colourbar(
barwidth = 0.5,
barheight = 10,
ticks = F
)) +
theme(
plot.title = element_text(size=12),
panel.background = element_rect(
colour = "black",
fill = "white",
size = 0.2
),
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank()
)
# save plot
suppressMessages(ggsave(
plot = map,
filename = paste0(itrt_plot_path, "/qa5_map/", i,".png")
))
# print log
if ((i %% print_frequrency) == 0) {
print(paste(i, "/", len_date_seq))
}
}
# load png paths and convert it into gif
png_files <- list.files(paste0(itrt_plot_path, "qa5_map/"),
pattern = ".*png$", full.names = TRUE)
png_files <- mixedsort(sort(png_files))
gifski(png_files, gif_file = paste0(itrt_plot_path, "qa5_map_08fps.gif"),
width = 1920, height = 1065, delay = 0.125)
pacman::p_unload(maps,gifski,gtools)
Generated Images
715 images located in ./Output/InteractivePlot/qa5_map/
Generated GIF